import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
automobile=pd.read_csv(r"E:\Ankit Jain\D drive\Aviraj Personal File\IMS Analytics Class\Github sets\Automobile\Automobile.csv")
automobile.sample(15).T
| 156 | 191 | 171 | 19 | 185 | 39 | 40 | 98 | 157 | 107 | 170 | 55 | 17 | 194 | 87 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| symboling | 0 | -1 | -1 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | -1 | 3 | 2 | -2 | 1 |
| normalized_losses | 91 | 74 | 65 | 81 | 137 | 85 | 85 | 128 | 91 | 161 | 65 | 150 | 121 | 103 | 128 |
| make | toyota | volvo | toyota | chevrolet | volkswagen | honda | honda | nissan | toyota | peugot | toyota | mazda | chevrolet | volvo | nissan |
| fuel_type | gas | gas | gas | gas | gas | gas | gas | gas | gas | diesel | diesel | gas | gas | gas | diesel |
| aspiration | std | std | std | std | std | std | std | std | std | turbo | turbo | std | std | turbo | std |
| number_of_doors | four | four | four | four | two | four | four | four | four | four | four | two | two | four | two |
| body_style | sedan | wagon | hatchback | sedan | convertible | sedan | sedan | sedan | hatchback | wagon | sedan | hatchback | hatchback | sedan | sedan |
| drive_wheels | fwd | rwd | fwd | fwd | fwd | fwd | fwd | fwd | fwd | rwd | fwd | rwd | fwd | rwd | fwd |
| engine_location | front | front | front | front | front | front | front | front | front | front | front | front | front | front | front |
| wheel_base | 95.7 | 104.3 | 102.4 | 94.5 | 94.5 | 96.5 | 96.5 | 100.4 | 95.7 | 114.2 | 102.4 | 95.3 | 88.4 | 104.3 | 94.5 |
| length | 166.3 | 188.8 | 175.6 | 158.8 | 159.3 | 175.4 | 175.4 | 181.7 | 166.3 | 198.9 | 175.6 | 169 | 141.1 | 188.8 | 165.3 |
| width | 64.4 | 67.2 | 66.5 | 63.6 | 64.2 | 62.5 | 65.2 | 66.5 | 64.4 | 68.4 | 66.5 | 65.7 | 60.3 | 67.2 | 63.8 |
| height | 53 | 57.5 | 53.9 | 52 | 55.6 | 54.1 | 54.1 | 55.1 | 52.8 | 58.7 | 54.9 | 49.6 | 53.2 | 56.2 | 54.5 |
| curb_weight | 2094 | 3034 | 2414 | 1909 | 2254 | 2372 | 2465 | 3095 | 2122 | 3430 | 2480 | 2500 | 1488 | 3045 | 2017 |
| engine_type | ohc | ohc | ohc | ohc | ohc | ohc | ohc | ohcv | ohc | l | ohc | rotor | l | ohc | ohc |
| number_of_cylinders | four | four | four | four | four | four | four | six | four | four | four | two | three | four | four |
| engine_size | 98 | 141 | 122 | 90 | 109 | 110 | 110 | 181 | 98 | 152 | 110 | 80 | 61 | 130 | 103 |
| fuel_system | 2bbl | mpfi | mpfi | 2bbl | mpfi | 1bbl | mpfi | mpfi | 2bbl | idi | idi | mpfi | 2bbl | mpfi | idi |
| bore | 3.19 | 3.78 | 3.31 | 3.03 | 3.19 | 3.15 | 3.15 | 3.43 | 3.19 | 3.7 | 3.27 | 3.28 | 2.91 | 3.62 | 2.99 |
| stroke | 3.03 | 3.15 | 3.54 | 3.11 | 3.4 | 3.58 | 3.58 | 3.27 | 3.03 | 3.52 | 3.35 | 3.5 | 3.03 | 3.15 | 3.47 |
| compression_ratio | 9 | 9.5 | 8.7 | 9.6 | 8.5 | 9 | 9 | 9 | 9 | 21 | 22.5 | 9.4 | 9.5 | 7.5 | 21.9 |
| horsepower | 70 | 114 | 92 | 70 | 90 | 86 | 101 | 152 | 70 | 95 | 73 | 135 | 48 | 162 | 55 |
| peak_rpm | 4800 | 5400 | 4200 | 5400 | 5500 | 5800 | 5800 | 5200 | 4800 | 4150 | 4500 | 6000 | 5100 | 5100 | 4800 |
| city_mpg | 38 | 23 | 27 | 38 | 24 | 27 | 24 | 17 | 28 | 25 | 30 | 16 | 47 | 17 | 45 |
| highway_mpg | 47 | 28 | 32 | 43 | 29 | 33 | 28 | 22 | 34 | 25 | 33 | 23 | 53 | 22 | 50 |
| price | 7738 | 13415 | 9988 | 6575 | 11595 | 10295 | 12945 | 13499 | 8358 | 13860 | 10698 | 15645 | 5151 | 18420 | 7099 |
automobile.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 201 entries, 0 to 200 Data columns (total 26 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 symboling 201 non-null int64 1 normalized_losses 201 non-null int64 2 make 201 non-null object 3 fuel_type 201 non-null object 4 aspiration 201 non-null object 5 number_of_doors 201 non-null object 6 body_style 201 non-null object 7 drive_wheels 201 non-null object 8 engine_location 201 non-null object 9 wheel_base 201 non-null float64 10 length 201 non-null float64 11 width 201 non-null float64 12 height 201 non-null float64 13 curb_weight 201 non-null int64 14 engine_type 201 non-null object 15 number_of_cylinders 201 non-null object 16 engine_size 201 non-null int64 17 fuel_system 201 non-null object 18 bore 201 non-null float64 19 stroke 201 non-null float64 20 compression_ratio 201 non-null float64 21 horsepower 201 non-null int64 22 peak_rpm 201 non-null int64 23 city_mpg 201 non-null int64 24 highway_mpg 201 non-null int64 25 price 201 non-null int64 dtypes: float64(7), int64(9), object(10) memory usage: 41.0+ KB
automobile.shape
(201, 26)
automobile.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| symboling | 201.0 | 0.840796 | 1.254802 | -2.00 | 0.00 | 1.00 | 2.00 | 3.00 |
| normalized_losses | 201.0 | 125.189055 | 33.572966 | 65.00 | 101.00 | 122.00 | 150.00 | 256.00 |
| wheel_base | 201.0 | 98.797015 | 6.066366 | 86.60 | 94.50 | 97.00 | 102.40 | 120.90 |
| length | 201.0 | 174.200995 | 12.322175 | 141.10 | 166.80 | 173.20 | 183.50 | 208.10 |
| width | 201.0 | 65.889055 | 2.101471 | 60.30 | 64.10 | 65.50 | 66.60 | 72.00 |
| height | 201.0 | 53.766667 | 2.447822 | 47.80 | 52.00 | 54.10 | 55.50 | 59.80 |
| curb_weight | 201.0 | 2555.666667 | 517.296727 | 1488.00 | 2169.00 | 2414.00 | 2926.00 | 4066.00 |
| engine_size | 201.0 | 126.875622 | 41.546834 | 61.00 | 98.00 | 120.00 | 141.00 | 326.00 |
| bore | 201.0 | 3.329701 | 0.268166 | 2.54 | 3.15 | 3.31 | 3.58 | 3.94 |
| stroke | 201.0 | 3.261741 | 0.317875 | 2.07 | 3.11 | 3.29 | 3.46 | 4.17 |
| compression_ratio | 201.0 | 10.164279 | 4.004965 | 7.00 | 8.60 | 9.00 | 9.40 | 23.00 |
| horsepower | 201.0 | 103.263682 | 37.389372 | 48.00 | 70.00 | 95.00 | 116.00 | 262.00 |
| peak_rpm | 201.0 | 5121.393035 | 479.624905 | 4150.00 | 4800.00 | 5200.00 | 5500.00 | 6600.00 |
| city_mpg | 201.0 | 25.179104 | 6.423220 | 13.00 | 19.00 | 24.00 | 30.00 | 49.00 |
| highway_mpg | 201.0 | 30.686567 | 6.815150 | 16.00 | 25.00 | 30.00 | 34.00 | 54.00 |
| price | 201.0 | 13207.129353 | 7947.066342 | 5118.00 | 7775.00 | 10295.00 | 16500.00 | 45400.00 |
for column in automobile:
print(automobile[column].unique())
print()
[ 3 1 2 0 -1 -2] [168 164 161 158 192 188 149 121 98 81 118 148 110 145 137 101 78 106 85 107 115 104 113 150 129 119 105 93 122 142 140 153 139 125 128 103 108 194 231 154 74 141 186 83 102 89 87 77 91 134 65 197 90 133 94 256 132 95] ['alfa-romero' 'audi' 'bmw' 'chevrolet' 'dodge' 'honda' 'isuzu' 'jaguar' 'mazda' 'mercedes-benz' 'mercury' 'mitsubishi' 'nissan' 'peugot' 'plymouth' 'porsche' 'renault' 'saab' 'subaru' 'toyota' 'volkswagen' 'volvo'] ['gas' 'diesel'] ['std' 'turbo'] ['two' 'four'] ['convertible' 'hatchback' 'sedan' 'wagon' 'hardtop'] ['rwd' 'fwd' '4wd'] ['front' 'rear'] [ 88.6 94.5 99.8 99.4 105.8 101.2 103.5 110. 88.4 93.7 103.3 95.9 86.6 96.5 94.3 96. 113. 102. 93.1 95.3 98.8 104.9 106.7 115.6 96.6 120.9 112. 102.7 93. 96.3 95.1 97.2 100.4 91.3 99.2 107.9 114.2 108. 89.5 96.1 99.1 93.3 97. 96.9 95.7 98.4 102.4 102.9 104.5 97.3 104.3 109.1] [168.8 171.2 176.6 177.3 192.7 176.8 189. 193.8 197. 141.1 155.9 158.8 157.3 174.6 173.2 144.6 150. 163.4 157.1 167.5 175.4 169.1 170.7 172.6 199.6 191.7 159.1 166.8 169. 177.8 175. 190.9 187.5 202.6 180.3 208.1 199.2 178.4 173. 172.4 165.3 170.2 165.6 162.4 173.4 181.7 184.6 178.5 186.7 198.9 167.3 168.9 181.5 186.6 156.9 157.9 172. 173.5 173.6 158.7 169.7 166.3 168.7 176.2 175.6 183.5 187.8 171.7 159.3 165.7 180.2 183.1 188.8] [64.1 65.5 66.2 66.4 66.3 71.4 64.8 66.9 67.9 70.9 60.3 63.6 63.8 64.6 63.9 64. 65.2 62.5 66. 61.8 69.6 70.6 64.2 65.7 66.5 66.1 70.3 71.7 70.5 72. 68. 64.4 65.4 68.4 68.3 65. 66.6 63.4 65.6 67.7 67.2 68.9 68.8] [48.8 52.4 54.3 53.1 55.7 55.9 53.7 56.3 53.2 52. 50.8 50.6 59.8 50.2 52.6 54.5 58.3 53.3 54.1 51. 53.5 51.4 52.8 47.8 49.6 55.5 54.4 56.5 58.7 54.9 56.7 55.4 54.8 49.4 51.6 54.7 55.1 56.1 49.7 56. 55.2 50.5 52.5 53. 59.1 53.9 55.6 56.2 57.5] [2548 2823 2337 2824 2507 2844 2954 3086 2395 2710 2765 3055 3230 3380 3505 1488 1874 1909 1876 2128 1967 1989 2191 2535 2811 1713 1819 1837 1940 1956 2010 2024 2236 2289 2304 2372 2465 2293 2734 4066 3950 1890 1900 1905 1945 1950 2380 2385 2500 2410 2443 2425 2670 2700 3515 3750 3495 3770 3740 3685 3900 3715 2910 1918 1944 2004 2145 2370 2328 2833 2921 2926 2365 2405 2403 1889 2017 1938 1951 2028 1971 2037 2008 2324 2302 3095 3296 3060 3071 3139 3020 3197 3430 3075 3252 3285 3485 3130 2818 2778 2756 2800 2579 2460 2658 2695 2707 2758 2808 2847 2050 2120 2240 2190 2340 2510 2290 2455 2420 2650 1985 2040 2015 2280 3110 2081 2109 2275 2094 2122 2140 2169 2204 2265 2300 2540 2536 2551 2679 2714 2975 2326 2480 2414 2458 2976 3016 3131 3151 2261 2209 2264 2212 2319 2254 2221 2661 2563 2912 3034 2935 3042 3045 3157 2952 3049 3012 3217 3062] ['dohc' 'ohcv' 'ohc' 'l' 'rotor' 'ohcf'] ['four' 'six' 'five' 'three' 'twelve' 'two' 'eight'] [130 152 109 136 131 108 164 209 61 90 98 122 156 92 79 110 111 119 258 326 91 70 80 140 134 183 234 308 304 97 103 120 181 151 194 132 121 146 171 161 141 173 145] ['mpfi' '2bbl' 'mfi' '1bbl' 'spfi' '4bbl' 'idi' 'spdi'] [3.47 2.68 3.19 3.13 3.5 3.31 3.62 2.91 3.03 2.97 3.34 3.6 2.92 3.15 3.43 3.63 3.54 3.08 3.28 3.39 3.76 3.58 3.46 3.8 3.78 3.17 3.35 3.59 2.99 3.33 3.7 3.61 3.94 3.74 2.54 3.05 3.27 3.24 3.01] [2.68 3.47 3.4 2.8 3.19 3.39 3.03 3.11 3.23 3.46 3.9 3.41 3.07 3.58 4.17 2.76 3.15 3.5 3.16 3.64 3.1 3.35 3.12 3.86 3.29 3.27 3.52 2.19 3.21 2.9 2.07 2.36 2.64 3.08 3.54 2.87] [ 9. 10. 8. 8.5 8.3 8.8 9.5 9.6 9.41 9.4 7.6 7. 9.2 10.1 9.1 8.1 11.5 8.6 22.7 22. 21.5 7.5 21.9 7.8 8.4 21. 8.7 9.31 9.3 7.7 22.5 23. ] [111 154 102 115 110 140 101 121 182 48 70 68 88 145 58 76 60 86 100 78 90 176 262 135 84 64 120 72 123 155 184 175 116 69 55 97 152 160 200 95 142 143 207 73 82 94 62 56 112 92 161 156 52 85 114 162 134 106] [5000 5500 5800 4250 5400 5100 4800 6000 4750 4650 4200 4350 4500 5200 4150 5600 5900 5250 4900 4400 6600 5300] [21 19 24 18 17 23 20 16 15 47 38 37 31 49 30 27 25 13 26 36 22 14 45 28 32 35 34 29 33] [27 26 30 22 25 20 29 28 53 43 41 38 24 54 42 34 33 31 19 17 23 32 39 18 16 37 50 36 47 46] [13495 16500 13950 17450 15250 17710 18920 23875 16430 16925 20970 21105 24565 30760 41315 36880 5151 6295 6575 5572 6377 7957 6229 6692 7609 8558 8921 12964 6479 6855 5399 6529 7129 7295 7895 9095 8845 10295 12945 10345 6785 11048 32250 35550 36000 5195 6095 6795 6695 7395 10945 11845 13645 15645 8495 10595 10245 10795 11245 18280 18344 25552 28248 28176 31600 34184 35056 40960 45400 16503 5389 6189 6669 7689 9959 8499 12629 14869 14489 6989 8189 9279 5499 7099 6649 6849 7349 7299 7799 7499 7999 8249 8949 9549 13499 14399 17199 19699 18399 11900 13200 12440 13860 15580 16900 16695 17075 16630 17950 18150 12764 22018 32528 34028 37028 9295 9895 11850 12170 15040 15510 18620 5118 7053 7603 7126 7775 9960 9233 11259 7463 10198 8013 11694 5348 6338 6488 6918 7898 8778 6938 7198 7788 7738 8358 9258 8058 8238 9298 9538 8449 9639 9989 11199 11549 17669 8948 10698 9988 10898 11248 16558 15998 15690 15750 7975 7995 8195 9495 9995 11595 9980 13295 13845 12290 12940 13415 15985 16515 18420 18950 16845 19045 21485 22470 22625]
for string in automobile:
if automobile[string].dtypes=="object":
print(automobile[string].value_counts())
print()
toyota 32 nissan 18 mazda 17 mitsubishi 13 honda 13 subaru 12 volkswagen 12 peugot 11 volvo 11 dodge 9 bmw 8 mercedes-benz 8 plymouth 7 audi 6 saab 6 porsche 4 chevrolet 3 jaguar 3 alfa-romero 3 renault 2 isuzu 2 mercury 1 Name: make, dtype: int64 gas 181 diesel 20 Name: fuel_type, dtype: int64 std 165 turbo 36 Name: aspiration, dtype: int64 four 114 two 87 Name: number_of_doors, dtype: int64 sedan 94 hatchback 68 wagon 25 hardtop 8 convertible 6 Name: body_style, dtype: int64 fwd 118 rwd 75 4wd 8 Name: drive_wheels, dtype: int64 front 198 rear 3 Name: engine_location, dtype: int64 ohc 145 ohcf 15 ohcv 13 l 12 dohc 12 rotor 4 Name: engine_type, dtype: int64 four 157 six 24 five 10 two 4 eight 4 three 1 twelve 1 Name: number_of_cylinders, dtype: int64 mpfi 92 2bbl 64 idi 20 1bbl 11 spdi 9 4bbl 3 mfi 1 spfi 1 Name: fuel_system, dtype: int64
sns.pairplot(automobile)
<seaborn.axisgrid.PairGrid at 0x2678d0d0f70>
plt.figure(figsize=(15,8))
sns.heatmap(automobile.corr(),annot=True)
<AxesSubplot:>
import plotly.express as px
px.scatter(automobile,x="city_mpg",y="peak_rpm")
px.scatter(automobile,x="width",y="height")
sns.displot(automobile["price"])
<seaborn.axisgrid.FacetGrid at 0x2679b606b80>
plt.figure(figsize=(15,5))
sns.barplot(data=automobile,x="number_of_cylinders",y="price")
<AxesSubplot:xlabel='number_of_cylinders', ylabel='price'>
pd.crosstab(automobile["number_of_cylinders"],automobile["make"],margins=True).T
| number_of_cylinders | eight | five | four | six | three | twelve | two | All |
|---|---|---|---|---|---|---|---|---|
| make | ||||||||
| alfa-romero | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 3 |
| audi | 0 | 5 | 1 | 0 | 0 | 0 | 0 | 6 |
| bmw | 0 | 0 | 2 | 6 | 0 | 0 | 0 | 8 |
| chevrolet | 0 | 0 | 2 | 0 | 1 | 0 | 0 | 3 |
| dodge | 0 | 0 | 9 | 0 | 0 | 0 | 0 | 9 |
| honda | 0 | 0 | 13 | 0 | 0 | 0 | 0 | 13 |
| isuzu | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 2 |
| jaguar | 0 | 0 | 0 | 2 | 0 | 1 | 0 | 3 |
| mazda | 0 | 0 | 13 | 0 | 0 | 0 | 4 | 17 |
| mercedes-benz | 4 | 4 | 0 | 0 | 0 | 0 | 0 | 8 |
| mercury | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
| mitsubishi | 0 | 0 | 13 | 0 | 0 | 0 | 0 | 13 |
| nissan | 0 | 0 | 12 | 6 | 0 | 0 | 0 | 18 |
| peugot | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 11 |
| plymouth | 0 | 0 | 7 | 0 | 0 | 0 | 0 | 7 |
| porsche | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 4 |
| renault | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 2 |
| saab | 0 | 0 | 6 | 0 | 0 | 0 | 0 | 6 |
| subaru | 0 | 0 | 12 | 0 | 0 | 0 | 0 | 12 |
| toyota | 0 | 0 | 28 | 4 | 0 | 0 | 0 | 32 |
| volkswagen | 0 | 1 | 11 | 0 | 0 | 0 | 0 | 12 |
| volvo | 0 | 0 | 9 | 2 | 0 | 0 | 0 | 11 |
| All | 4 | 10 | 157 | 24 | 1 | 1 | 4 | 201 |
automobile["normalized_losses"].max(),automobile["normalized_losses"].min()
(256, 65)
loss=pd.cut(automobile["normalized_losses"],[60,90,120,150,180,210,240,270])
automobile.pivot_table(values=["price","engine_size","compression_ratio","stroke","bore"],index=["make",loss],margins=True)
| bore | compression_ratio | engine_size | price | stroke | ||
|---|---|---|---|---|---|---|
| make | normalized_losses | |||||
| alfa-romero | (150, 180] | 3.206667 | 9.000000 | 137.333333 | 15498.333333 | 2.943333 |
| audi | (150, 180] | 3.180000 | 8.633333 | 130.666667 | 17859.166667 | 3.400000 |
| bmw | (120, 150] | 3.542500 | 8.250000 | 197.750000 | 33380.000000 | 3.340000 |
| (180, 210] | 3.405000 | 8.900000 | 136.000000 | 18857.500000 | 2.995000 | |
| chevrolet | (60, 90] | 3.030000 | 9.600000 | 90.000000 | 6575.000000 | 3.110000 |
| (90, 120] | 3.030000 | 9.600000 | 90.000000 | 6295.000000 | 3.110000 | |
| (120, 150] | 2.910000 | 9.500000 | 61.000000 | 5151.000000 | 3.030000 | |
| dodge | (90, 120] | 3.077500 | 8.727500 | 100.000000 | 7206.750000 | 3.327500 |
| (120, 150] | 3.108000 | 8.560000 | 104.800000 | 8410.400000 | 3.396000 | |
| honda | (60, 90] | 3.092500 | 9.050000 | 105.500000 | 9845.000000 | 3.537500 |
| (90, 120] | 3.012857 | 9.257143 | 97.857143 | 7669.571429 | 3.434286 | |
| (120, 150] | 2.910000 | 9.400000 | 92.000000 | 6667.000000 | 3.410000 | |
| isuzu | (90, 120] | 3.370000 | 8.850000 | 115.000000 | 8916.500000 | 3.230000 |
| jaguar | (90, 120] | 3.585000 | 9.800000 | 292.000000 | 35775.000000 | 3.465000 |
| (120, 150] | 3.630000 | 8.100000 | 258.000000 | 32250.000000 | 4.170000 | |
| mazda | (90, 120] | 3.268182 | 11.227273 | 110.636364 | 9961.727273 | 3.282727 |
| (120, 150] | 3.316667 | 9.133333 | 89.000000 | 11920.000000 | 3.463333 | |
| mercedes-benz | (90, 120] | 3.580000 | 21.500000 | 183.000000 | 28394.000000 | 3.640000 |
| (120, 150] | 3.630000 | 8.150000 | 270.000000 | 38900.000000 | 3.225000 | |
| mercury | (120, 150] | 3.780000 | 8.000000 | 140.000000 | 16503.000000 | 3.120000 |
| mitsubishi | (120, 150] | 3.400000 | 7.571429 | 133.142857 | 10817.571429 | 3.631429 |
| (150, 180] | 3.076667 | 8.633333 | 101.000000 | 7399.000000 | 3.333333 | |
| nissan | (90, 120] | 3.303333 | 8.966667 | 132.666667 | 10290.666667 | 3.343333 |
| (120, 150] | 3.165000 | 10.912500 | 108.250000 | 7774.000000 | 3.310000 | |
| (150, 180] | 3.150000 | 9.400000 | 97.000000 | 8249.000000 | 3.290000 | |
| (180, 210] | 3.430000 | 8.400000 | 181.000000 | 18449.000000 | 3.270000 | |
| (210, 240] | 3.430000 | 9.000000 | 181.000000 | 18399.000000 | 3.270000 | |
| peugot | (150, 180] | 3.582727 | 14.000000 | 135.818182 | 15489.090909 | 3.160000 |
| plymouth | (60, 90] | 3.350000 | 8.500000 | 122.000000 | 8921.000000 | 3.460000 |
| (90, 120] | 3.000000 | 8.500000 | 94.000000 | 6764.500000 | 3.310000 | |
| (120, 150] | 3.590000 | 7.000000 | 156.000000 | 12764.000000 | 3.860000 | |
| (150, 180] | 2.970000 | 9.400000 | 92.666667 | 6843.333333 | 3.230000 | |
| porsche | (120, 150] | 3.740000 | 9.500000 | 194.000000 | 34528.000000 | 2.900000 |
| (180, 210] | 3.940000 | 9.500000 | 151.000000 | 22018.000000 | 3.110000 | |
| renault | (120, 150] | 3.460000 | 8.700000 | 132.000000 | 9595.000000 | 3.900000 |
| saab | (90, 120] | 3.540000 | 9.200000 | 121.000000 | 15433.333333 | 3.070000 |
| (120, 150] | 3.206667 | 9.203333 | 121.000000 | 15013.333333 | 2.736667 | |
| subaru | (60, 90] | 3.620000 | 8.728571 | 106.428571 | 8163.142857 | 2.600000 |
| (90, 120] | 3.620000 | 8.940000 | 108.000000 | 9070.600000 | 2.640000 | |
| toyota | (60, 90] | 3.184545 | 10.136364 | 111.727273 | 9132.727273 | 3.273636 |
| (90, 120] | 3.192500 | 12.375000 | 100.250000 | 7994.250000 | 3.110000 | |
| (120, 150] | 3.570000 | 9.285714 | 148.142857 | 12034.857143 | 3.478571 | |
| (150, 180] | 3.215000 | 9.200000 | 98.000000 | 8783.000000 | 3.055000 | |
| (180, 210] | 3.270000 | 9.300000 | 171.000000 | 16278.000000 | 3.350000 | |
| volkswagen | (90, 120] | 3.118000 | 14.800000 | 104.200000 | 8835.000000 | 3.400000 |
| (120, 150] | 3.130000 | 13.500000 | 109.500000 | 11129.166667 | 3.400000 | |
| (240, 270] | 3.190000 | 8.500000 | 109.000000 | 9980.000000 | 3.400000 | |
| volvo | (60, 90] | 3.726667 | 8.833333 | 137.333333 | 16293.333333 | 3.150000 |
| (90, 120] | 3.638750 | 10.750000 | 144.125000 | 18726.875000 | 3.146250 | |
| All | 3.329701 | 10.164279 | 126.875622 | 13207.129353 | 3.261741 |
automobile.pivot_table(values=["price"],index=["make"],columns="fuel_type",aggfunc=sum).plot()
plt.show()
round(automobile.groupby("make")["price","normalized_losses","length","width","height",
"curb_weight","horsepower","peak_rpm","city_mpg","highway_mpg"].mean())
| price | normalized_losses | length | width | height | curb_weight | horsepower | peak_rpm | city_mpg | highway_mpg | |
|---|---|---|---|---|---|---|---|---|---|---|
| make | ||||||||||
| alfa-romero | 15498.0 | 168.0 | 170.0 | 65.0 | 50.0 | 2640.0 | 125.0 | 5000.0 | 20.0 | 27.0 |
| audi | 17859.0 | 162.0 | 185.0 | 69.0 | 55.0 | 2759.0 | 114.0 | 5500.0 | 19.0 | 24.0 |
| bmw | 26119.0 | 170.0 | 184.0 | 66.0 | 55.0 | 2929.0 | 139.0 | 5069.0 | 19.0 | 25.0 |
| chevrolet | 6007.0 | 100.0 | 152.0 | 62.0 | 52.0 | 1757.0 | 63.0 | 5300.0 | 41.0 | 46.0 |
| dodge | 7875.0 | 133.0 | 161.0 | 64.0 | 52.0 | 2151.0 | 86.0 | 5389.0 | 28.0 | 34.0 |
| honda | 8185.0 | 103.0 | 161.0 | 64.0 | 53.0 | 2097.0 | 80.0 | 5754.0 | 30.0 | 35.0 |
| isuzu | 8916.0 | 110.0 | 172.0 | 64.0 | 52.0 | 2536.0 | 84.0 | 4900.0 | 24.0 | 29.0 |
| jaguar | 34600.0 | 125.0 | 197.0 | 70.0 | 51.0 | 4027.0 | 205.0 | 4833.0 | 14.0 | 18.0 |
| mazda | 10653.0 | 123.0 | 171.0 | 66.0 | 53.0 | 2298.0 | 86.0 | 5109.0 | 26.0 | 32.0 |
| mercedes-benz | 33647.0 | 114.0 | 195.0 | 71.0 | 56.0 | 3696.0 | 146.0 | 4488.0 | 18.0 | 21.0 |
| mercury | 16503.0 | 140.0 | 178.0 | 68.0 | 55.0 | 2910.0 | 175.0 | 5000.0 | 19.0 | 24.0 |
| mitsubishi | 9240.0 | 145.0 | 168.0 | 65.0 | 51.0 | 2382.0 | 104.0 | 5269.0 | 25.0 | 31.0 |
| nissan | 10416.0 | 135.0 | 171.0 | 65.0 | 54.0 | 2400.0 | 103.0 | 5178.0 | 27.0 | 33.0 |
| peugot | 15489.0 | 161.0 | 191.0 | 68.0 | 57.0 | 3221.0 | 100.0 | 4668.0 | 22.0 | 27.0 |
| plymouth | 7963.0 | 131.0 | 165.0 | 64.0 | 52.0 | 2221.0 | 87.0 | 5357.0 | 28.0 | 34.0 |
| porsche | 31400.0 | 142.0 | 169.0 | 66.0 | 51.0 | 2772.0 | 191.0 | 5800.0 | 18.0 | 26.0 |
| renault | 9595.0 | 129.0 | 179.0 | 67.0 | 53.0 | 2520.0 | 90.0 | 5500.0 | 23.0 | 31.0 |
| saab | 15223.0 | 127.0 | 187.0 | 66.0 | 56.0 | 2746.0 | 127.0 | 5333.0 | 20.0 | 27.0 |
| subaru | 8541.0 | 92.0 | 169.0 | 65.0 | 54.0 | 2316.0 | 86.0 | 4775.0 | 26.0 | 31.0 |
| toyota | 9886.0 | 111.0 | 172.0 | 65.0 | 54.0 | 2441.0 | 93.0 | 4859.0 | 28.0 | 33.0 |
| volkswagen | 10078.0 | 125.0 | 173.0 | 66.0 | 55.0 | 2343.0 | 81.0 | 5154.0 | 29.0 | 35.0 |
| volvo | 18063.0 | 91.0 | 189.0 | 68.0 | 56.0 | 3038.0 | 128.0 | 5291.0 | 21.0 | 26.0 |
# automobile.dtypes
for decimal in automobile:
if automobile[decimal].dtypes=="int64":
automobile[decimal]=automobile[decimal].astype(float)
print(automobile[decimal].dtypes)
float64 float64 float64 float64 float64 float64 float64 float64 float64
automobile=automobile.select_dtypes(exclude=["object"])
x=automobile.drop("price",axis=1)
y=automobile["price"]
print(x,y)
symboling normalized_losses wheel_base length width height \
0 3.0 168.0 88.6 168.8 64.1 48.8
1 3.0 168.0 88.6 168.8 64.1 48.8
2 1.0 168.0 94.5 171.2 65.5 52.4
3 2.0 164.0 99.8 176.6 66.2 54.3
4 2.0 164.0 99.4 176.6 66.4 54.3
.. ... ... ... ... ... ...
196 -1.0 95.0 109.1 188.8 68.9 55.5
197 -1.0 95.0 109.1 188.8 68.8 55.5
198 -1.0 95.0 109.1 188.8 68.9 55.5
199 -1.0 95.0 109.1 188.8 68.9 55.5
200 -1.0 95.0 109.1 188.8 68.9 55.5
curb_weight engine_size bore stroke compression_ratio horsepower \
0 2548.0 130.0 3.47 2.68 9.0 111.0
1 2548.0 130.0 3.47 2.68 9.0 111.0
2 2823.0 152.0 2.68 3.47 9.0 154.0
3 2337.0 109.0 3.19 3.40 10.0 102.0
4 2824.0 136.0 3.19 3.40 8.0 115.0
.. ... ... ... ... ... ...
196 2952.0 141.0 3.78 3.15 9.5 114.0
197 3049.0 141.0 3.78 3.15 8.7 160.0
198 3012.0 173.0 3.58 2.87 8.8 134.0
199 3217.0 145.0 3.01 3.40 23.0 106.0
200 3062.0 141.0 3.78 3.15 9.5 114.0
peak_rpm city_mpg highway_mpg
0 5000.0 21.0 27.0
1 5000.0 21.0 27.0
2 5000.0 19.0 26.0
3 5500.0 24.0 30.0
4 5500.0 18.0 22.0
.. ... ... ...
196 5400.0 23.0 28.0
197 5300.0 19.0 25.0
198 5500.0 18.0 23.0
199 4800.0 26.0 27.0
200 5400.0 19.0 25.0
[201 rows x 15 columns] 0 13495.0
1 16500.0
2 16500.0
3 13950.0
4 17450.0
...
196 16845.0
197 19045.0
198 21485.0
199 22470.0
200 22625.0
Name: price, Length: 201, dtype: float64
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=.25,random_state=3)
print(xtrain.head())
symboling normalized_losses wheel_base length width height \
103 1.0 231.0 99.2 178.5 67.9 49.7
89 1.0 122.0 94.5 165.3 63.8 54.5
133 2.0 104.0 99.1 186.6 66.5 56.1
109 0.0 161.0 107.9 186.7 68.4 56.7
170 -1.0 65.0 102.4 175.6 66.5 54.9
curb_weight engine_size bore stroke compression_ratio horsepower \
103 3139.0 181.0 3.43 3.27 9.0 160.0
89 1938.0 97.0 3.15 3.29 9.4 69.0
133 2847.0 121.0 3.54 3.07 9.0 160.0
109 3252.0 152.0 3.70 3.52 21.0 95.0
170 2480.0 110.0 3.27 3.35 22.5 73.0
peak_rpm city_mpg highway_mpg
103 5200.0 19.0 25.0
89 5200.0 31.0 37.0
133 5500.0 19.0 26.0
109 4150.0 28.0 33.0
170 4500.0 30.0 33.0
print(xtest.head())
symboling normalized_losses wheel_base length width height \
40 0.0 85.0 96.5 175.4 65.2 54.1
51 1.0 113.0 93.1 166.8 64.2 54.1
140 0.0 102.0 97.0 172.0 65.4 54.3
132 3.0 150.0 99.1 186.6 66.5 56.1
171 -1.0 65.0 102.4 175.6 66.5 53.9
curb_weight engine_size bore stroke compression_ratio horsepower \
40 2465.0 110.0 3.15 3.58 9.0 101.0
51 1950.0 91.0 3.08 3.15 9.0 68.0
140 2385.0 108.0 3.62 2.64 9.0 82.0
132 2808.0 121.0 3.54 3.07 9.0 160.0
171 2414.0 122.0 3.31 3.54 8.7 92.0
peak_rpm city_mpg highway_mpg
40 5800.0 24.0 28.0
51 5000.0 31.0 38.0
140 4800.0 24.0 25.0
132 5500.0 19.0 26.0
171 4200.0 27.0 32.0
from sklearn.tree import DecisionTreeRegressor
dtr=DecisionTreeRegressor()
dtr.fit(xtrain,ytrain)
DecisionTreeRegressor()
ypred=dtr.predict(xtest)
ypred
array([ 8013., 6695., 11259., 18620., 11248., 8845., 6938., 45400.,
45400., 13499., 17669., 9370., 6692., 7898., 9298., 15998.,
7499., 8948., 9995., 8845., 36880., 33278., 20970., 16845.,
8013., 9298., 15750., 8449., 18399., 6649., 28176., 6938.,
9258., 7099., 9095., 9980., 11900., 18420., 28248., 5389.,
12170., 5389., 6575., 36880., 9370., 12170., 12964., 18920.,
6229., 7788., 6295.])
pd.DataFrame({"Actual":ytest,"Predicted":ypred})
| Actual | Predicted | |
|---|---|---|
| 40 | 12945.0 | 8013.0 |
| 51 | 7395.0 | 6695.0 |
| 140 | 9233.0 | 11259.0 |
| 132 | 18150.0 | 18620.0 |
| 171 | 9988.0 | 11248.0 |
| 82 | 6989.0 | 8845.0 |
| 153 | 7198.0 | 6938.0 |
| 46 | 36000.0 | 45400.0 |
| 70 | 40960.0 | 45400.0 |
| 100 | 13499.0 | 13499.0 |
| 151 | 8778.0 | 17669.0 |
| 83 | 8189.0 | 9370.0 |
| 25 | 7609.0 | 6692.0 |
| 139 | 9960.0 | 7898.0 |
| 160 | 8238.0 | 9298.0 |
| 174 | 16558.0 | 15998.0 |
| 95 | 8249.0 | 7499.0 |
| 3 | 13950.0 | 8948.0 |
| 41 | 10345.0 | 9995.0 |
| 58 | 10595.0 | 8845.0 |
| 14 | 30760.0 | 36880.0 |
| 125 | 37028.0 | 33278.0 |
| 12 | 21105.0 | 20970.0 |
| 6 | 17710.0 | 16845.0 |
| 143 | 10198.0 | 8013.0 |
| 162 | 9538.0 | 9298.0 |
| 176 | 15690.0 | 15750.0 |
| 164 | 9639.0 | 8449.0 |
| 101 | 17199.0 | 18399.0 |
| 86 | 5499.0 | 6649.0 |
| 64 | 25552.0 | 28176.0 |
| 47 | 5195.0 | 6938.0 |
| 159 | 8058.0 | 9258.0 |
| 34 | 7295.0 | 7099.0 |
| 38 | 8845.0 | 9095.0 |
| 185 | 11595.0 | 9980.0 |
| 4 | 17450.0 | 11900.0 |
| 72 | 16503.0 | 18420.0 |
| 67 | 31600.0 | 28248.0 |
| 146 | 5348.0 | 5389.0 |
| 128 | 11850.0 | 12170.0 |
| 115 | 5572.0 | 5389.0 |
| 156 | 7738.0 | 6575.0 |
| 15 | 41315.0 | 36880.0 |
| 61 | 11245.0 | 9370.0 |
| 130 | 15040.0 | 12170.0 |
| 121 | 12764.0 | 12964.0 |
| 198 | 21485.0 | 18920.0 |
| 23 | 6229.0 | 6229.0 |
| 154 | 7898.0 | 7788.0 |
| 31 | 5399.0 | 6295.0 |
from sklearn import metrics
print("mean absolute error:",round(metrics.mean_absolute_error(ytest,ypred)))
print("mean squared error:",round(metrics.mean_squared_error(ytest,ypred)))
print("root mean squared error:", round(np.sqrt(metrics.mean_squared_error(ytest,ypred))))
mean absolute error: 1919 mean squared error: 8225198 root mean squared error: 2868
plt.hist(y)
(array([81., 45., 35., 17., 6., 3., 4., 7., 2., 1.]),
array([ 5118. , 9146.2, 13174.4, 17202.6, 21230.8, 25259. , 29287.2,
33315.4, 37343.6, 41371.8, 45400. ]),
<BarContainer object of 10 artists>)
plt.figure(figsize=(15,5))
sns.distplot(y,hist=False,label="Actual",color="r")
sns.distplot(ypred,hist=False,label="Predicted",color="b")
plt.show()
# plt.close()
from sklearn import tree
plt.figure(figsize=(15,8))
tree.plot_tree(dtr,filled=True,rounded=True,fontsize=7)
plt.show()
from sklearn.ensemble import RandomForestRegressor
rfr=RandomForestRegressor(n_estimators=15,random_state=4)
rfr.fit(xtrain,ytrain)
RandomForestRegressor(n_estimators=15, random_state=4)
y_pred=rfr.predict(xtest)
y_pred
array([11063.73333333, 6700.8 , 9798.73333333, 18254.73333333,
10915.93333333, 8735.08888889, 7740.8 , 37940. ,
40052.93333333, 15662.93333333, 17682.46666667, 8676.88888889,
6604.66666667, 8946.53333333, 8435.2 , 17125.86666667,
7495.66666667, 9705.96666667, 9263.33333333, 8860.82222222,
33571.26666667, 31249.93333333, 19366.86666667, 19862.53333333,
9339. , 10020.16666667, 17016.8 , 8964.73333333,
19401. , 6885.66666667, 28512.73333333, 6482.86666667,
8531.4 , 7166. , 9582.66666667, 9613.4 ,
16411.13333333, 17341.46666667, 32338.86666667, 6567.73333333,
13807.16666667, 6120.46666667, 7308.66666667, 33196.93333333,
9256.94444444, 13772.33333333, 13545.66666667, 20355.6 ,
6453.6 , 8038.26666667, 5872.06666667])
pd.DataFrame({"Actual": ytest,"Prediction":y_pred})
| Actual | Prediction | |
|---|---|---|
| 40 | 12945.0 | 11063.733333 |
| 51 | 7395.0 | 6700.800000 |
| 140 | 9233.0 | 9798.733333 |
| 132 | 18150.0 | 18254.733333 |
| 171 | 9988.0 | 10915.933333 |
| 82 | 6989.0 | 8735.088889 |
| 153 | 7198.0 | 7740.800000 |
| 46 | 36000.0 | 37940.000000 |
| 70 | 40960.0 | 40052.933333 |
| 100 | 13499.0 | 15662.933333 |
| 151 | 8778.0 | 17682.466667 |
| 83 | 8189.0 | 8676.888889 |
| 25 | 7609.0 | 6604.666667 |
| 139 | 9960.0 | 8946.533333 |
| 160 | 8238.0 | 8435.200000 |
| 174 | 16558.0 | 17125.866667 |
| 95 | 8249.0 | 7495.666667 |
| 3 | 13950.0 | 9705.966667 |
| 41 | 10345.0 | 9263.333333 |
| 58 | 10595.0 | 8860.822222 |
| 14 | 30760.0 | 33571.266667 |
| 125 | 37028.0 | 31249.933333 |
| 12 | 21105.0 | 19366.866667 |
| 6 | 17710.0 | 19862.533333 |
| 143 | 10198.0 | 9339.000000 |
| 162 | 9538.0 | 10020.166667 |
| 176 | 15690.0 | 17016.800000 |
| 164 | 9639.0 | 8964.733333 |
| 101 | 17199.0 | 19401.000000 |
| 86 | 5499.0 | 6885.666667 |
| 64 | 25552.0 | 28512.733333 |
| 47 | 5195.0 | 6482.866667 |
| 159 | 8058.0 | 8531.400000 |
| 34 | 7295.0 | 7166.000000 |
| 38 | 8845.0 | 9582.666667 |
| 185 | 11595.0 | 9613.400000 |
| 4 | 17450.0 | 16411.133333 |
| 72 | 16503.0 | 17341.466667 |
| 67 | 31600.0 | 32338.866667 |
| 146 | 5348.0 | 6567.733333 |
| 128 | 11850.0 | 13807.166667 |
| 115 | 5572.0 | 6120.466667 |
| 156 | 7738.0 | 7308.666667 |
| 15 | 41315.0 | 33196.933333 |
| 61 | 11245.0 | 9256.944444 |
| 130 | 15040.0 | 13772.333333 |
| 121 | 12764.0 | 13545.666667 |
| 198 | 21485.0 | 20355.600000 |
| 23 | 6229.0 | 6453.600000 |
| 154 | 7898.0 | 8038.266667 |
| 31 | 5399.0 | 5872.066667 |
print("MSE :", round(metrics.mean_squared_error(ytest,y_pred)))
print("MAE:", round(metrics.mean_absolute_error(ytest,ypred)))
print("SRMSE :",round( np.sqrt(metrics.mean_squared_error(ytest,ypred))))
MSE : 5457224 MAE: 1919 SRMSE : 2868
plt.figure(figsize=(15,5))
sns.distplot(y,hist=False,color="red")
sns.distplot(y_pred,hist=False,color="blue")
plt.show()